package us.codecraft.webmagic.example;

import java.util.Date;
import java.util.List;

import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
import us.codecraft.webmagic.selector.Type;

/**
 * @author code4crafter@gmail.com <br>
 * @since 0.3.2
 */
@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
public class OschinaBlog {

	@ExtractBy("//title/text()")
	private String title;

	@ExtractBy(value = "div.BlogContent", type = Type.css)
	private String content;

	@ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
	private List<String> tags;

	@ExtractBy("//div[@class='BlogStat']/regex('\\d+-\\d+-\\d+\\s+\\d+:\\d+')")
	private Date date;

	public static void main(String[] args) {
		// results will be saved to "/data/webmagic/" in json format
		OOSpider.create(Site.me(), new JsonFilePageModelPipeline("/data/webmagic/"), OschinaBlog.class)
				.addUrl("http://my.oschina.net/flashsword/blog").run();
	}

	public String getTitle() {
		return title;
	}

	public String getContent() {
		return content;
	}

	public List<String> getTags() {
		return tags;
	}

	public Date getDate() {
		return date;
	}

}
